* NOTE: if Stata does not do so automatically, cd to the top level directory of the data depository (where this script is stored)
* requires user-written package grstyle (from SSC)

log using experimental_analysis, replace
use data\maindata.dta, clear
estimates clear

cap program drop stratFisher
program define stratFisher, rclass // this program performs a Fisher exact test on a 2x2 table when data are drawn from strata and thus should only be permuted within strata (see Jung 2004)
	syntax varname (numeric) [if], FACtor(varname) STRATifiedby(varlist) // "varname" is the outcome variable. It and the factor must be binary 0/1, with one being success and treated, respectively
	preserve
	qui {
		* prep data
		if "`if'"!="" keep `if'
		egen factor = group(`factor') // in case factor variable isn't coded 0/1, we generate one that is
		replace factor = factor-1 // replaces previous line's output 1/2 with 0/1
		assert inlist(`varlist',0,1) & inlist(factor,0,1) // verifies that outcome variable and factor variable are both binary
		
		* convert to count data
		count if `varlist' & factor // number of successes when factor = 1 ...
		scalar S = r(N) // ... stored in S: the reference statistic that will be compared against the empirical distribution
		collapse (count) n=`varlist' (sum) z=`varlist' m=factor, by(`stratifiedby') // transforming ind'l obs into count data for each stratum: # obs. (n), # successes (z), # treated (m)
		gen j = _n // stratum identifier
		scalar J = _N // number of strata
		
		* expand to one observation per possible allocation of the "successes" between levels of factor WITHIN each stratum, and calculate its probability
		gen m_minus = max(0,z-n+m)
		gen m_plus = min(z,m)
		expand m_plus-m_minus+1
		bys j: gen i = m_minus+_n-1
		gen double f = hypergeometricp(n,z,m,i) // f is the pmf of drawing i "successes" when picking m "treated" obs. at random from n obs. with z successes (within stratum)
		recode f (.=1) // Stata generates missing values when there is only one possible allocation of successes
		keep j i f
		compress
		tempfile fulldata
		save `fulldata'
		
		* separate the strata and calculate the joint pmf f_ and aggreggate successes (for "treated" obs.) I for all combinations of strata outcomes
		forvalues j=2/`=J' {
			use `fulldata' if j==`j', clear
			drop j
			tempfile file`j'
			save `file`j''
			}
		use `fulldata' if j==1
		drop j
		rename (i f) (I f_)
		forvalues j=2/`=J' {
			cross using `file`j''
			replace I = I+i
			replace f_ = f_*f // f_ is product up to now; f is density of new strata's value
			collapse (sum) f_, by(I)
			}
		
		* compare S to the empirical distribution of I
		gen gr = (I>S)*f_
		gen eq = (I==S)*f_
		collapse (sum) gr eq f_
		assert float(f_)==1 // summing all probabilities must yield 1
		gen p = eq + min(gr,1-eq-gr)
	}
	return scalar p = min(1,2*p[1]) // 2*p could be >1 if I=S has large mass
	di "number of successes " S ", two-sided p = " return(p)
	restore
end


/***********************************************************************
		Figures
***********************************************************************/

*** figure 3 (new) ***

graph drop _all
set scheme s1color
grstyle init
grstyle set color cblind

* left panel: defendant and precedent
preserve
	tempfile defendant
	statsby mean=r(mean) lb=r(lb) ub=r(ub), by(def_nat  ) saving(`defendant')	: ci proportions guilty
	statsby mean=r(mean) lb=r(lb) ub=r(ub), by(precedent) clear					: ci proportions guilty
	append using `defendant'
	qui sum precedent
	scalar shift = r(max) + 2
	replace precedent = def_nationality + shift if mi(precedent)
	label define precedent 0 "{bf:By Precedent}" `=shift' "{bf:By Defendant}" `=shift+"sympathetic":nationality' "sympathetic" `=shift+"unsympathetic":nationality' "unsympathetic", add
	twoway (dot mean precedent, horizontal dotextend(no) ndots(0)) (rspike ub lb precedent, horizontal) , ///
			ytitle("") yscale(reverse range(-1/8))  ylabel(0 1 2 3 5 6 7, valuelabel noticks angle(horizontal)) ///
			xtitle("Proportion of Convictions Affirmed") ///
			legend(order(0 "Placeholder Text!") region(lstyle(none)) color(none)) /// NB: the legend is invisible bc of color choice "none" -- included only for spacing
			note("Spikes indicate exact 95% c.i.s.") title("Rate of Affirming Conviction", span) subtitle("by Precedent or Defendant", span) /// 
			name(mainexperiment) graphregion(style(outline)) // "graphregion(style(outline))" draws a line around the graph to set it off from the second graph below
			
* right panel: anchor
local agtitles `"title("Distribution of Sentences") subtitle("by Anchor") graphregion(style(outline))"'
local aglabeling `"legend(rows(1) order(- "Anchor:" 1 "10" 2 "25" 3 "40"))  xtitle("Sentence (years)")"'
restore, preserve
	drop if sentence>50
	* option 1: kernel density
	twoway	(kdensity sentence if anchor==10, lpattern(solid)  lwidth(*2)) ///
			(kdensity sentence if anchor==25, lpattern(shortdash)  lwidth(*2)) ///
			(kdensity sentence if anchor==40, lpattern(dash)  lwidth(*2)) ///
			, `agtitles' `aglabeling' ///
			ylabel(none) ytitle("probability density", margin(r+1)) note("Excludes 8 sentences >50 years. Epanechnikov kernel.") name(anchor_kernel) 
	* option 2: "histogram"
	** 2.0: three plots with kernel density overlay
	twoway	(kdensity sentence, bwidth(3) area(10) recast(area) lcolor(gs12) fcolor(gs12)) (hist sentence, width(.5) start(.25) color(black)), ///
			by(anchor, cols(1) compact `agtitles' note("Graphs by anchor value. Exclude 8 sentences >50 years.")) ///
			legend(order(2 "Histogram" 1 "Kernel density (Epanechnikov(3))") symxsize(*.5) symysize(*.3) size(*.8) colgap(*.5)) ///
			plotr(m(zero)) ylabel(none) xsc(titlegap(-5) r(-1 51)) xlabel(0 10 25 40 50) xtitle("Sentence (years)") ytitle("Density") ///
			name(anchor_hist_kernel_3, replace)
	** 2.a: three plots
	contract anchor sentence, freq(frequency)
	twoway	spike frequency sentence, by(anchor, cols(1) compact `agtitles' note("Graphs by anchor value. Exclude 8 sentences >50 years.")) ///
			lwidth(*2) plotr(m(zero)) ysc(r(0 22)) ylabel(none) xsc(titlegap(-5) r(-1 51)) xlabel(0 10 25 40 50) xtitle("Sentence (years)") ///subtitle("Anchor =", prefix) adds line
			name(anchor_hist_3, replace)
	** 2.b: one plot
	replace sentence = sentence -.3 if anchor==10
	replace sentence = sentence +.3 if anchor==40
	twoway	(spike frequency sentence if anchor==10, lwidth(*1.5) lpattern(solid)) ///
			(spike frequency sentence if anchor==25, lwidth(*1.5) lpattern(shortdash)) ///
			(spike frequency sentence if anchor==40, lwidth(*1.5) lpattern(dash)) ///
			, `agtitles' `aglabeling' ///
			ylabel(0 20, notick labgap(0)) ytitle(, margin(r+1)) note("Excludes 8 sentences >50 years.") name(anchor_hist)
	
restore

graph combine mainexperiment anchor_hist_kernel_3, fysize(60) title("Fig. 3: Decisions by Experimental Treatment") saving(graphs/decisions_by_treatment, replace)
*/

*** figure S1 (Appendix: precedent/defendant by country) = old figure 3 ***

gen guilty2=guilty if def_nat=="unsympathetic":nationality
gen guilty3=guilty if def_nat=="sympathetic":nationality
graph dot guilty guilty2 guilty3, over(precedent, label(labsize(*.7))) over(country, gap(100)) ///
	marker(1, msymbol(O) mcolor(black)) marker(2, msymbol(dh) mcolor(blue) msize(*1.5)) marker(3, msymbol(sh) mcolor(orange_red) msize(*1.5)) ///
	legend(rows(1) order(- "Defendant:" 1 2 3) label(1 "both") label(2 "unsympathetic") label(3 "sympathetic") size(*.7)) ///
	linetype(line) lines(lwidth(*.5)) ///
	ytitle("Rate") title("Fig. S1: Affirmance Rates by Country (and Precedent and Defendant)", span size(*.9))  ///
	saving(graphs\affirmance_rates_by_country, replace)
	
	
*** Figure S2 (Appendix: anchor by country) = old figure 4 ***

graph hbox sentence if sentence<200, over(anchor, gap(*.3) label(labsize(*.7))) over(country) noouts ///
	intensity(0) medtype(cline) medline(lcolor(red) lwidth(*4)) cwhiskers lines(lpattern(dash)) ///
	marker(1, msymbol(o) mcolor(black) msize(*.5)) ///
	ytitle("Sentence (years)") title("Fig. S2: Sentences by Country (and Anchor)") ///
	note("Medians (red marker), 25th and 75th percentiles (box), and adjacent values (whiskers)." ///
	"Five U.S. outlier sentences (11019, 1540, 1040, 210, and 209 years) omitted.") ///
	saving(graphs\sentences_by_anchor_country, replace)


/***********************************************************************
		Statistical Tests
***********************************************************************/

******************************
*** Precedent and Defendant***

* univariate
local factors def_nationality precedent 
foreach factor in `factors' {
	foreach r in "1" "canceled!=1" "misunderstanding!=1" "inrange(prec_mentioned_gen,2,4)" { // the first one ("1") is an always-true placeholder to make "if" syntanx work with unrestricted tests
		if "`r'"!="1" di _newline "restriction: `r'"
		tab guilty `factor' if `r', column nofreq exact nolog
		if "`factor'"=="def_nationality" {
			qui stratFisher guilty if `r', fac(`factor') strat(country `: list factors - factor')
			di %5.3f r(p) " - stratified Fisher exact p"
			}
		else {
			forvalues excprec=1/3 { // stratFisher can only deal with two levels of the factor so need to exclude one
				qui stratFisher guilty if `r' & precedent!=`excprec' , fac(`factor') strat(country `: list factors - factor')
				di %5.3f r(p) " - stratified Fisher exact p, excluded precedent " `excprec'
				}
			}
		}
	}

spearman guilty precedent // takes into account ordering of precedent

* multivariate
anova guilty def_nat precedent

clogit guilty i`="sympathetic":nationality'.def_nat i.precedent, group(country) or
	test 2.precedent 3.precedent // joint test of precedent
	lincom i1.def_nat-3.precedent, or // OR estimate and 95% c.i. for going from sympathetic/REVERSE to unsympathetic/Affirm, or OR(Affirm/REVERSE)/OR(Unsympathetic/Sympathetic) 

gen reverse=precedent==2 // exlogistic can't handle factor variables
gen REVERSE=precedent==3
gen sympathetic = def_nat=="sympathetic":nationality
exlogistic guilty sympathetic reverse REVERSE, group(country) memory(2g) nolog terms(precedent = reverse REVERSE)
	exlogistic, test(score) // this calculates the p-value for the joint test of terms() in the previous line

***********************
*** Anchor ************

* preliminary: can't use parametric tests assuming normality
sktest sentence
sktest sentence if sentence<200
gen ln_sentence = ln(sentence)
sktest ln_sentence

* univariate
kwallis sentence, by(anchor) // = multisample generalization of the two-sample Wilcoxon (Mann–Whitney) rank-sum test
kwallis sentence if sentence<200, by(anchor)
spearman sentence anchor
spearman sentence anchor if sentence<200
pwcorr sentence anchor, sig
pwcorr sentence anchor if sentence<200, sig
bys country: spearman sentence anchor if sentence<200

* regressions (--> T. S.2)
xtset country
foreach anchorvar in anchor i.anchor {
	estimates drop _all
	_eststo: xtreg sentence i`="sympathetic":nationality'.def_nat i.precedent `anchorvar' if sentence<200, fe robust
		test 2.precedent 3.precedent
		estadd scalar joint_p = r(p)
	_eststo: qreg sentence i`="sympathetic":nationality'.def_nat i.precedent `anchorvar' i.country
		test 2.precedent 3.precedent
		estadd scalar joint_p = r(p)
	_eststo: xtreg ln_sentence i`="sympathetic":nationality'.def_nat i.precedent `anchorvar', fe robust
		test 2.precedent 3.precedent
		estadd scalar joint_p = r(p)
	esttab using tables\regressions_`anchorvar'.csv, ///
		drop(?.country _cons) order(anchor) noconstant nobaselevels ///
		scalars(joint_p) ci obslast b(%4.2f) ci(%4.2f) transform(anchor @*15 15) nostar label replace // note that anchor is recorded per 15 years
	}
	
log close